********************************************************************************
**	PURPOSE: Run table 5 but with variables broken down to their components, create AT5a, AT5b
**
**	INPUTS: HTE_ficscore08_numloans_e1_df
**			HTE_ficscore08_numloans_e2_df
**			HTE_ficscore08_numloans_e3_df
**			HTE_scoredf_numloans_e1_df
**			HTE_scoredf_numloans_e2_df
**			HTE_scoredf_numloans_e3_df
**	
**	INTERMEIDATE OUTPUTS: HTE_ficoscore08_e(1/2/3)_df_comp.dta
**			 			  HTE_scoredf_e(1/2/3)_df_comp.dta
**
**	FINAL OUTPUTS: Het_search_ficoscore08_ e(1/2/3)Ter.xlsx 
**			 	  Het_search_scoredf_ e(1/2/3)Ter.xlsx 
**			 
**  CREATED/EDITED BY: Kayla Wilding, Leah Kim, Hasan Ahamed
**
**	DATE CREATED: 7/2018
**
**	DATE LAST EDITED: 3/1/2023
********************************************************************************
clear all

local var_add_b `" "risk" "contknow" "liqcf" "'
foreach var of local var_add_b {
    local var_list 
    foreach v in $`var' {
		local var_list `var_list' `v'1
	}
	global `var'_b `var_list'
}

local hetvars age female married adults child race_black college $risk_b $contknow_b $liqcf_b bfico $credac $credac_rev prevloans1

local binaryvars female race_black married college checkcs1 creport1 creditc1 healthc1 agec1 borrowc1 repayc1 incgt301 savslccu60more1 openinstallB1 opentradeB1
local contvars age adults child revbvrisk1 revbvafford1 bvtoday1 revbvlongterm1 bvregret1 bvtask1 bvstress1 bvbills1 revbvneed1 revfinsit1 savingsbal_mehb951 bfico ihs_openinstalltr1 ihs_inquiry121 ihs_openrevolvtr1 ihs_usecreditoptrst1 prevloans1

set autotabgraphs on
*set trace on
* Looping through dependent vars and endlines (each causal forest) although 
qui foreach dep in ficoscore08 scoredf { 
	forval x = 1/3 {
		import delimited using "$outputtables//HTE_`dep'_numloans_e`x'_df.csv", clear
		merge 1:1 surveyid using "$adta/Wide with outcomes.dta", keepusing($risk_b $contknow_b $liqcf_b $credac $credac_rev prevloans1) keep(1 3)
		
		// Labelling Top/Bottom Terciles
		*Split the CATE variable into terciles 
		xtile ter_`dep' = t1_s1_pred,n(3)
		tab ter_`dep', gen(flag_ter_`dep')
		gen highest_ter = (ter_`dep'==3)
		gen lowest_ter = (ter_`dep'==1)	
		
		*Split the baseline vars into terciles			
		foreach covar in `hetvars' {
			if "`dep'" == "scoredf" & "`covar'" == "bfico" {
			continue
			}
			xtile `covar'3ile = `covar', n(3)
			sum `covar'3ile
			gen h_ter`covar' = (`covar'3ile==3)
			gen l_ter`covar' = (`covar'3ile==1)												
		}	
		
		save "$adta/HTE_`dep'_e`x'_df_comp.dta", replace
		
		** Reg of outcome on each baseline var
		*Mean (SE) of Baseline Variables for Observations in 
		*Terciles of Predicted Treatment Effect (CATE)
		if "`dep'" == "ficoscore08" {											
		** Setting up Results Matrix
		mat results = J(`:word count `hetvars''*2,6,.)
		mat results_q = J(`:word count `hetvars''*2,2,.)
		mat coln results = L_ter_CATE H_ter_CATE P_CATE L_ter_char H_ter_char P_char 
		mat rown results = `hetvars'

		loc row_b = 1
		foreach regvar in `hetvars' {
			*Output the mean of the baseline variable by tercile of CATEs
			reg `regvar' lowest_ter highest_ter if inlist(ter_`dep',1,3), nocons
			mat raw = r(table)
			** Coefs and SEs
			mat list raw
			mat results[`row_b',1]=raw[1,1]
			mat results[`row_b',2]=raw[1,2]
			mat results[`row_b'+1,1]=raw[2,1]
			mat results[`row_b'+1,2]=raw[2,2]
		
	
			* Difference test
			test highest_ter == lowest_ter
				mat results[`row_b',3]=`r(p)'
			
			sum `regvar'3ile
			loc max `r(max)'

			*Mean CATEs by the tercile of the baseline variables
			if (`:list regvar in binaryvars') {
				reg `dep' enc if `regvar' == 0
				*estimates store low
				mat results[`row_b',4]=_b[enc]
				mat results[`row_b'+1,4]=_se[enc]
				
				reg `dep' enc if `regvar' == 1
				mat results[`row_b',5]=_b[enc]
				mat results[`row_b'+1,5]=_se[enc]
				*estimates store high 
			}
			else if (`:list regvar in contvars') {
				reg `dep' enc if `regvar'3ile == 1
				*estimates store low
				mat results[`row_b',4]=_b[enc]
				mat results[`row_b'+1,4]=_se[enc]
				
				reg `dep' enc if `regvar'3ile == `max'
				*estimates store high 
				mat results[`row_b',5]=_b[enc]
				mat results[`row_b'+1,5]=_se[enc]

			}
						
				
			if (`:list regvar in binaryvars') {
				reg `dep' enc##i.`regvar'
				loc coefeqpval r(table)["pvalue","1.enc#1.`regvar'"]
			}
			else if (`:list regvar in contvars') {
				reg `dep' enc##i.`regvar'3ile
				loc coefeqpval r(table)["pvalue","1.enc#`max'.`regvar'3ile"]
			}
			
			mat results[`row_b',6]=`coefeqpval'
			loc row_b =`row_b'+2
			
		} // End Het Loop
		
		putexcel set "$outputtables/Het_Search_`dep'_ e`x'Ter_comp.xlsx", replace
		putexcel B3 = matrix(results), names nformat(number_d2)
		putexcel B1 = "Terciles: `dep'_numloans_e`x'"
		local rownum : word count `hetvars'
		forval y = 1/`rownum' {
			local varname : word `y' of `hetvars'
			local rownum = `y'*2 + 3
			putexcel A`rownum' = "`varname'"
			}
		} //Close if ficoscore								
	} //Close forval loop through endlines
} //Close loop through dependent vars 

